package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.document.AbstractField; // for javadocs
import org.apache.lucene.document.Document;

import java.text.NumberFormat;
import java.io.PrintStream;
import java.io.IOException;
import java.io.File;
import java.util.Collection;

import java.util.Comparator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;

/**
 * Basic tool and API to check the health of an index and
 * write a new segments file that removes reference to
 * problematic segments.
 * 
 * <p>As this tool checks every byte in the index, on a large
 * index it can take quite a long time to run.
 *
 * @lucene.experimental Please make a complete backup of your
 * index before using this to fix your index!
 */
public class CheckIndex {

    private PrintStream infoStream;
    private Directory dir;

    /**
     * Returned from {@link #checkIndex()} detailing the health and status of the index.
     *
     * @lucene.experimental
     **/

    public static class Status {

        /** True if no problems were found with the index. */
        public boolean clean;

        /** True if we were unable to locate and load the segments_N file. */
        public boolean missingSegments;

        /** True if we were unable to open the segments_N file. */
        public boolean cantOpenSegments;

        /** True if we were unable to read the version number from segments_N file. */
        public boolean missingSegmentVersion;

        /** Name of latest segments_N file in the index. */
        public String segmentsFileName;

        /** Number of segments in the index. */
        public int numSegments;

        /** String description of the version of the index. */
        public String segmentFormat;

        /** Empty unless you passed specific segments list to check as optional 3rd argument.
         *  @see CheckIndex#checkIndex(List) */
        public List<String> segmentsChecked = new ArrayList<String>();

        /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
        public boolean toolOutOfDate;

        /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
        public List<SegmentInfoStatus> segmentInfos = new ArrayList<SegmentInfoStatus>();

        /** Directory index is in. */
        public Directory dir;

        /** 
         * SegmentInfos instance containing only segments that
         * had no problems (this is used with the {@link CheckIndex#fixIndex} 
         * method to repair the index. 
         */
        SegmentInfos newSegments;

        /** How many documents will be lost to bad segments. */
        public int totLoseDocCount;

        /** How many bad segments were found. */
        public int numBadSegments;

        /** True if we checked only specific segments ({@link
         * #checkIndex(List)}) was called with non-null
         * argument). */
        public boolean partial;

        /** The greatest segment name. */
        public int maxSegmentName;

        /** Whether the SegmentInfos.counter is greater than any of the segments' names. */
        public boolean validCounter;

        /** Holds the userData of the last commit in the index */
        public Map<String, String> userData;

        /** Holds the status of each segment in the index.
         *  See {@link #segmentInfos}.
         *
         * <p><b>WARNING</b>: this API is new and experimental and is
         * subject to suddenly change in the next release.
         */
        public static class SegmentInfoStatus {
            /** Name of the segment. */
            public String name;

            /** Document count (does not take deletions into account). */
            public int docCount;

            /** True if segment is compound file format. */
            public boolean compound;

            /** Number of files referenced by this segment. */
            public int numFiles;

            /** Net size (MB) of the files referenced by this
             *  segment. */
            public double sizeMB;

            /** Doc store offset, if this segment shares the doc
             *  store files (stored fields and term vectors) with
             *  other segments.  This is -1 if it does not share. */
            public int docStoreOffset = -1;

            /** String of the shared doc store segment, or null if
             *  this segment does not share the doc store files. */
            public String docStoreSegment;

            /** True if the shared doc store files are compound file
             *  format. */
            public boolean docStoreCompoundFile;

            /** True if this segment has pending deletions. */
            public boolean hasDeletions;

            /** Name of the current deletions file name. */
            public String deletionsFileName;

            /** Number of deleted documents. */
            public int numDeleted;

            /** True if we were able to open a SegmentReader on this
             *  segment. */
            public boolean openReaderPassed;

            /** Number of fields in this segment. */
            int numFields;

            /** True if at least one of the fields in this segment
             *  has position data
             *  @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
            public boolean hasProx;

            /** Map that includes certain
             *  debugging details that IndexWriter records into
             *  each segment it creates */
            public Map<String, String> diagnostics;

            /** Status for testing of field norms (null if field norms could not be tested). */
            public FieldNormStatus fieldNormStatus;

            /** Status for testing of indexed terms (null if indexed terms could not be tested). */
            public TermIndexStatus termIndexStatus;

            /** Status for testing of stored fields (null if stored fields could not be tested). */
            public StoredFieldStatus storedFieldStatus;

            /** Status for testing of term vectors (null if term vectors could not be tested). */
            public TermVectorStatus termVectorStatus;
        }

        /**
         * Status from testing field norms.
         */
        public static final class FieldNormStatus {
            /** Number of fields successfully tested */
            public long totFields = 0L;

            /** Exception thrown during term index test (null on success) */
            public Throwable error = null;
        }

        /**
         * Status from testing term index.
         */
        public static final class TermIndexStatus {
            /** Total term count */
            public long termCount = 0L;

            /** Total frequency across all terms. */
            public long totFreq = 0L;

            /** Total number of positions. */
            public long totPos = 0L;

            /** Exception thrown during term index test (null on success) */
            public Throwable error = null;
        }

        /**
         * Status from testing stored fields.
         */
        public static final class StoredFieldStatus {

            /** Number of documents tested. */
            public int docCount = 0;

            /** Total number of stored fields tested. */
            public long totFields = 0;

            /** Exception thrown during stored fields test (null on success) */
            public Throwable error = null;
        }

        /**
         * Status from testing stored fields.
         */
        public static final class TermVectorStatus {

            /** Number of documents tested. */
            public int docCount = 0;

            /** Total number of term vectors tested. */
            public long totVectors = 0;

            /** Exception thrown during term vector test (null on success) */
            public Throwable error = null;
        }
    }

    /** Create a new CheckIndex on the directory. */
    public CheckIndex(Directory dir) {
        this.dir = dir;
        infoStream = null;
    }

    /** Set infoStream where messages should go.  If null, no
     *  messages are printed */
    public void setInfoStream(PrintStream out) {
        infoStream = out;
    }

    private void msg(String msg) {
        if (infoStream != null)
            infoStream.println(msg);
    }

    private static class MySegmentTermDocs extends SegmentTermDocs {

        int delCount;

        MySegmentTermDocs(SegmentReader p) {
            super(p);
        }

        @Override
        public void seek(Term term) throws IOException {
            super.seek(term);
            delCount = 0;
        }

        @Override
        protected void skippingDoc() throws IOException {
            delCount++;
        }
    }

    /** Returns a {@link Status} instance detailing
     *  the state of the index.
     *
     *  <p>As this method checks every byte in the index, on a large
     *  index it can take quite a long time to run.
     *
     *  <p><b>WARNING</b>: make sure
     *  you only call this when the index is not opened by any
     *  writer. */
    public Status checkIndex() throws IOException {
        return checkIndex(null);
    }

    /** Returns a {@link Status} instance detailing
     *  the state of the index.
     * 
     *  @param onlySegments list of specific segment names to check
     *
     *  <p>As this method checks every byte in the specified
     *  segments, on a large index it can take quite a long
     *  time to run.
     *
     *  <p><b>WARNING</b>: make sure
     *  you only call this when the index is not opened by any
     *  writer. */
    public Status checkIndex(List<String> onlySegments) throws IOException {
        NumberFormat nf = NumberFormat.getInstance();
        SegmentInfos sis = new SegmentInfos();
        Status result = new Status();
        result.dir = dir;
        try {
            sis.read(dir);
        } catch (Throwable t) {
            msg("ERROR: could not read any segments file in directory");
            result.missingSegments = true;
            if (infoStream != null)
                t.printStackTrace(infoStream);
            return result;
        }

        // find the oldest and newest segment versions
        String oldest = Integer.toString(Integer.MAX_VALUE),
                        newest = Integer.toString(Integer.MIN_VALUE);
        String oldSegs = null;
        boolean foundNonNullVersion = false;
        Comparator<String> versionComparator = StringHelper.getVersionComparator();
        for (SegmentInfo si : sis) {
            String version = si.getVersion();
            if (version == null) {
                // pre-3.1 segment
                oldSegs = "pre-3.1";
            } else if (version.equals("2.x")) {
                // an old segment that was 'touched' by 3.1+ code
                oldSegs = "2.x";
            } else {
                foundNonNullVersion = true;
                if (versionComparator.compare(version, oldest) < 0) {
                    oldest = version;
                }
                if (versionComparator.compare(version, newest) > 0) {
                    newest = version;
                }
            }
        }

        final int numSegments = sis.size();
        final String segmentsFileName = sis.getCurrentSegmentFileName();
        IndexInput input = null;
        try {
            input = dir.openInput(segmentsFileName);
        } catch (Throwable t) {
            msg("ERROR: could not open segments file in directory");
            if (infoStream != null)
                t.printStackTrace(infoStream);
            result.cantOpenSegments = true;
            return result;
        }
        int format = 0;
        try {
            format = input.readInt();
        } catch (Throwable t) {
            msg("ERROR: could not read segment file version in directory");
            if (infoStream != null)
                t.printStackTrace(infoStream);
            result.missingSegmentVersion = true;
            return result;
        } finally {
            if (input != null)
                input.close();
        }

        String sFormat = "";
        boolean skip = false;

        if (format == SegmentInfos.FORMAT)
            sFormat = "FORMAT [Lucene Pre-2.1]";
        if (format == SegmentInfos.FORMAT_LOCKLESS)
            sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
        else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
            sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
        else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
            sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
        else {
            if (format == SegmentInfos.FORMAT_CHECKSUM)
                sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
            else if (format == SegmentInfos.FORMAT_DEL_COUNT)
                sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
            else if (format == SegmentInfos.FORMAT_HAS_PROX)
                sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
            else if (format == SegmentInfos.FORMAT_USER_DATA)
                sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
            else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
                sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
            else if (format == SegmentInfos.FORMAT_HAS_VECTORS)
                sFormat = "FORMAT_HAS_VECTORS [Lucene 3.1]";
            else if (format == SegmentInfos.FORMAT_3_1)
                sFormat = "FORMAT_3_1 [Lucene 3.1+]";
            else if (format == SegmentInfos.CURRENT_FORMAT)
                throw new RuntimeException("BUG: You should update this tool!");
            else if (format < SegmentInfos.CURRENT_FORMAT) {
                sFormat = "int=" + format + " [newer version of Lucene than this tool]";
                skip = true;
            } else {
                sFormat = format + " [Lucene 1.3 or prior]";
            }
        }

        result.segmentsFileName = segmentsFileName;
        result.numSegments = numSegments;
        result.segmentFormat = sFormat;
        result.userData = sis.getUserData();
        String userDataString;
        if (sis.getUserData().size() > 0) {
            userDataString = " userData=" + sis.getUserData();
        } else {
            userDataString = "";
        }

        String versionString = null;
        if (oldSegs != null) {
            if (foundNonNullVersion) {
                versionString = "versions=[" + oldSegs + " .. " + newest + "]";
            } else {
                versionString = "version=" + oldSegs;
            }
        } else {
            versionString = oldest.equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]");
        }

        msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " format=" + sFormat + userDataString);

        if (onlySegments != null) {
            result.partial = true;
            if (infoStream != null)
                infoStream.print("\nChecking only these segments:");
            for (String s : onlySegments) {
                if (infoStream != null)
                    infoStream.print(" " + s);
            }
            result.segmentsChecked.addAll(onlySegments);
            msg(":");
        }

        if (skip) {
            msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
            result.toolOutOfDate = true;
            return result;
        }

        result.newSegments = (SegmentInfos) sis.clone();
        result.newSegments.clear();
        result.maxSegmentName = -1;

        for (int i = 0; i < numSegments; i++) {
            final SegmentInfo info = sis.info(i);
            int segmentName = Integer.parseInt(info.name.substring(1), Character.MAX_RADIX);
            if (segmentName > result.maxSegmentName) {
                result.maxSegmentName = segmentName;
            }
            if (onlySegments != null && !onlySegments.contains(info.name))
                continue;
            Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
            result.segmentInfos.add(segInfoStat);
            msg("  " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
            segInfoStat.name = info.name;
            segInfoStat.docCount = info.docCount;

            int toLoseDocCount = info.docCount;

            SegmentReader reader = null;

            try {
                msg("    compound=" + info.getUseCompoundFile());
                segInfoStat.compound = info.getUseCompoundFile();
                msg("    hasProx=" + info.getHasProx());
                segInfoStat.hasProx = info.getHasProx();
                msg("    numFiles=" + info.files().size());
                segInfoStat.numFiles = info.files().size();
                segInfoStat.sizeMB = info.sizeInBytes(true) / (1024. * 1024.);
                msg("    size (MB)=" + nf.format(segInfoStat.sizeMB));
                Map<String, String> diagnostics = info.getDiagnostics();
                segInfoStat.diagnostics = diagnostics;
                if (diagnostics.size() > 0) {
                    msg("    diagnostics = " + diagnostics);
                }

                final int docStoreOffset = info.getDocStoreOffset();
                if (docStoreOffset != -1) {
                    msg("    docStoreOffset=" + docStoreOffset);
                    segInfoStat.docStoreOffset = docStoreOffset;
                    msg("    docStoreSegment=" + info.getDocStoreSegment());
                    segInfoStat.docStoreSegment = info.getDocStoreSegment();
                    msg("    docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
                    segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
                }
                final String delFileName = info.getDelFileName();
                if (delFileName == null) {
                    msg("    no deletions");
                    segInfoStat.hasDeletions = false;
                } else {
                    msg("    has deletions [delFileName=" + delFileName + "]");
                    segInfoStat.hasDeletions = true;
                    segInfoStat.deletionsFileName = delFileName;
                }
                if (infoStream != null)
                    infoStream.print("    test: open reader.........");
                reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);

                segInfoStat.openReaderPassed = true;

                final int numDocs = reader.numDocs();
                toLoseDocCount = numDocs;
                if (reader.hasDeletions()) {
                    if (reader.deletedDocs.count() != info.getDelCount()) {
                        throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
                    }
                    if (reader.deletedDocs.count() > reader.maxDoc()) {
                        throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
                    }
                    if (info.docCount - numDocs != info.getDelCount()) {
                        throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
                    }
                    segInfoStat.numDeleted = info.docCount - numDocs;
                    msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
                } else {
                    if (info.getDelCount() != 0) {
                        throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
                    }
                    msg("OK");
                }
                if (reader.maxDoc() != info.docCount)
                    throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);

                // Test getFieldNames()
                if (infoStream != null) {
                    infoStream.print("    test: fields..............");
                }
                Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
                msg("OK [" + fieldNames.size() + " fields]");
                segInfoStat.numFields = fieldNames.size();

                // Test Field Norms
                segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);

                // Test the Term Index
                segInfoStat.termIndexStatus = testTermIndex(info, reader);

                // Test Stored Fields
                segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);

                // Test Term Vectors
                segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);

                // Rethrow the first exception we encountered
                //  This will cause stats for failed segments to be incremented properly
                if (segInfoStat.fieldNormStatus.error != null) {
                    throw new RuntimeException("Field Norm test failed");
                } else if (segInfoStat.termIndexStatus.error != null) {
                    throw new RuntimeException("Term Index test failed");
                } else if (segInfoStat.storedFieldStatus.error != null) {
                    throw new RuntimeException("Stored Field test failed");
                } else if (segInfoStat.termVectorStatus.error != null) {
                    throw new RuntimeException("Term Vector test failed");
                }

                msg("");

            } catch (Throwable t) {
                msg("FAILED");
                String comment;
                comment = "fixIndex() would remove reference to this segment";
                msg("    WARNING: " + comment + "; full exception:");
                if (infoStream != null)
                    t.printStackTrace(infoStream);
                msg("");
                result.totLoseDocCount += toLoseDocCount;
                result.numBadSegments++;
                continue;
            } finally {
                if (reader != null)
                    reader.close();
            }

            // Keeper
            result.newSegments.add((SegmentInfo) info.clone());
        }

        if (0 == result.numBadSegments) {
            result.clean = true;
        } else
            msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");

        if (!(result.validCounter = (result.maxSegmentName < sis.counter))) {
            result.clean = false;
            result.newSegments.counter = result.maxSegmentName + 1;
            msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
        }

        if (result.clean) {
            msg("No problems were detected with this index.\n");
        }

        return result;
    }

    /**
     * Test field norms.
     */
    private Status.FieldNormStatus testFieldNorms(Collection<String> fieldNames, SegmentReader reader) {
        final Status.FieldNormStatus status = new Status.FieldNormStatus();

        try {
            // Test Field Norms
            if (infoStream != null) {
                infoStream.print("    test: field norms.........");
            }
            final byte[] b = new byte[reader.maxDoc()];
            for (final String fieldName : fieldNames) {
                if (reader.hasNorms(fieldName)) {
                    reader.norms(fieldName, b, 0);
                    ++status.totFields;
                }
            }

            msg("OK [" + status.totFields + " fields]");
        } catch (Throwable e) {
            msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
            status.error = e;
            if (infoStream != null) {
                e.printStackTrace(infoStream);
            }
        }

        return status;
    }

    /**
     * Test the term index.
     */
    private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
        final Status.TermIndexStatus status = new Status.TermIndexStatus();

        final IndexSearcher is = new IndexSearcher(reader);

        try {
            if (infoStream != null) {
                infoStream.print("    test: terms, freq, prox...");
            }

            final TermEnum termEnum = reader.terms();
            final TermPositions termPositions = reader.termPositions();

            // Used only to count up # deleted docs for this term
            final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);

            final int maxDoc = reader.maxDoc();
            Term lastTerm = null;
            while (termEnum.next()) {
                status.termCount++;
                final Term term = termEnum.term();
                lastTerm = term;

                final int docFreq = termEnum.docFreq();
                if (docFreq <= 0) {
                    throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
                }
                termPositions.seek(term);
                int lastDoc = -1;
                int freq0 = 0;
                status.totFreq += docFreq;
                while (termPositions.next()) {
                    freq0++;
                    final int doc = termPositions.doc();
                    final int freq = termPositions.freq();
                    if (doc <= lastDoc)
                        throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
                    if (doc >= maxDoc)
                        throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);

                    lastDoc = doc;
                    if (freq <= 0)
                        throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");

                    int lastPos = -1;
                    status.totPos += freq;
                    for (int j = 0; j < freq; j++) {
                        final int pos = termPositions.nextPosition();
                        if (pos < -1)
                            throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
                        if (pos < lastPos)
                            throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
                        lastPos = pos;
                    }
                }

                // Test skipping
                for (int idx = 0; idx < 7; idx++) {
                    final int skipDocID = (int) (((idx + 1) * (long) maxDoc) / 8);
                    termPositions.seek(term);
                    if (!termPositions.skipTo(skipDocID)) {
                        break;
                    } else {

                        final int docID = termPositions.doc();
                        if (docID < skipDocID) {
                            throw new RuntimeException("term " + term + ": skipTo(docID=" + skipDocID + ") returned docID=" + docID);
                        }
                        final int freq = termPositions.freq();
                        if (freq <= 0) {
                            throw new RuntimeException("termFreq " + freq + " is out of bounds");
                        }
                        int lastPosition = -1;
                        for (int posUpto = 0; posUpto < freq; posUpto++) {
                            final int pos = termPositions.nextPosition();
                            if (pos < 0) {
                                throw new RuntimeException("position " + pos + " is out of bounds");
                            }
                            // TODO: we should assert when all pos == 0 that positions are actually omitted
                            if (pos < lastPosition) {
                                throw new RuntimeException("position " + pos + " is < lastPosition " + lastPosition);
                            }
                            lastPosition = pos;
                        }

                        if (!termPositions.next()) {
                            break;
                        }
                        final int nextDocID = termPositions.doc();
                        if (nextDocID <= docID) {
                            throw new RuntimeException("term " + term + ": skipTo(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
                        }
                    }
                }

                // Now count how many deleted docs occurred in
                // this term:
                final int delCount;
                if (reader.hasDeletions()) {
                    myTermDocs.seek(term);
                    while (myTermDocs.next()) {
                    }
                    delCount = myTermDocs.delCount;
                } else {
                    delCount = 0;
                }

                if (freq0 + delCount != docFreq) {
                    throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
                }
            }

            // Test search on last term:
            if (lastTerm != null) {
                is.search(new TermQuery(lastTerm), 1);
            }

            try {
                long uniqueTermCountAllFields = reader.getUniqueTermCount();
                if (status.termCount != uniqueTermCountAllFields) {
                    throw new RuntimeException("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.termCount));
                }
            } catch (UnsupportedOperationException ex) {
                // not supported
            }

            msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");

        } catch (Throwable e) {
            msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
            status.error = e;
            if (infoStream != null) {
                e.printStackTrace(infoStream);
            }
        }

        return status;
    }

    /**
     * Test stored fields for a segment.
     */
    private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) {
        final Status.StoredFieldStatus status = new Status.StoredFieldStatus();

        try {
            if (infoStream != null) {
                infoStream.print("    test: stored fields.......");
            }

            // Scan stored fields for all documents
            for (int j = 0; j < info.docCount; ++j) {
                if (!reader.isDeleted(j)) {
                    status.docCount++;
                    Document doc = reader.document(j);
                    status.totFields += doc.getFields().size();
                }
            }

            // Validate docCount
            if (status.docCount != reader.numDocs()) {
                throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
            }

            msg("OK [" + status.totFields + " total field count; avg " + format.format((((float) status.totFields) / status.docCount)) + " fields per doc]");
        } catch (Throwable e) {
            msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
            status.error = e;
            if (infoStream != null) {
                e.printStackTrace(infoStream);
            }
        }

        return status;
    }

    /**
     * Test term vectors for a segment.
     */
    private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
        final Status.TermVectorStatus status = new Status.TermVectorStatus();

        try {
            if (infoStream != null) {
                infoStream.print("    test: term vectors........");
            }

            for (int j = 0; j < info.docCount; ++j) {
                if (!reader.isDeleted(j)) {
                    status.docCount++;
                    TermFreqVector[] tfv = reader.getTermFreqVectors(j);
                    if (tfv != null) {
                        status.totVectors += tfv.length;
                    }
                }
            }

            msg("OK [" + status.totVectors + " total vector count; avg " + format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
        } catch (Throwable e) {
            msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
            status.error = e;
            if (infoStream != null) {
                e.printStackTrace(infoStream);
            }
        }

        return status;
    }

    /** Repairs the index using previously returned result
     *  from {@link #checkIndex}.  Note that this does not
     *  remove any of the unreferenced files after it's done;
     *  you must separately open an {@link IndexWriter}, which
     *  deletes unreferenced files when it's created.
     *
     * <p><b>WARNING</b>: this writes a
     *  new segments file into the index, effectively removing
     *  all documents in broken segments from the index.
     *  BE CAREFUL.
     *
     * <p><b>WARNING</b>: Make sure you only call this when the
     *  index is not opened  by any writer. */
    public void fixIndex(Status result) throws IOException {
        if (result.partial)
            throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
        result.newSegments.changed();
        result.newSegments.commit(result.dir);
    }

    private static boolean assertsOn;

    private static boolean testAsserts() {
        assertsOn = true;
        return true;
    }

    private static boolean assertsOn() {
        assert testAsserts();
        return assertsOn;
    }

    /** Command-line interface to check and fix an index.
    
    <p>
    Run it like this:
    <pre>
    java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
    </pre>
    <ul>
    <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments
    
    <li><code>-segment X</code>: only check the specified
    segment(s).  This can be specified multiple times,
    to check more than one segment, eg <code>-segment _2
    -segment _a</code>.  You can't use this with the -fix
    option.
    </ul>
    
    <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
                       documents (perhaps many) to be permanently removed from the index.  Always make
                       a backup copy of your index before running this!  Do not run this tool on an index
                       that is actively being written to.  You have been warned!
    
    <p>                Run without -fix, this tool will open the index, report version information
                       and report any exceptions it hits and what action it would take if -fix were
                       specified.  With -fix, this tool will remove any segments that have issues and
                       write a new segments_N file.  This means all documents contained in the affected
                       segments will be removed.
    
    <p>
                       This tool exits with exit code 1 if the index cannot be opened or has any
                       corruption, else 0.
     */
    public static void main(String[] args) throws IOException, InterruptedException {

        boolean doFix = false;
        List<String> onlySegments = new ArrayList<String>();
        String indexPath = null;
        int i = 0;
        while (i < args.length) {
            if (args[i].equals("-fix")) {
                doFix = true;
                i++;
            } else if (args[i].equals("-segment")) {
                if (i == args.length - 1) {
                    System.out.println("ERROR: missing name for -segment option");
                    System.exit(1);
                }
                onlySegments.add(args[i + 1]);
                i += 2;
            } else {
                if (indexPath != null) {
                    System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
                    System.exit(1);
                }
                indexPath = args[i];
                i++;
            }
        }

        if (indexPath == null) {
            System.out.println("\nERROR: index path not specified");
            System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + "  -fix: actually write a new segments_N file, removing any problematic segments\n" + "  -segment X: only check the specified segments.  This can be specified multiple\n" + "              times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + "              You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index.  Always make\n" + "a backup copy of your index before running this!  Do not run this tool on an index\n" + "that is actively being written to.  You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified.  With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file.  This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n");
            System.exit(1);
        }

        if (!assertsOn())
            System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");

        if (onlySegments.size() == 0)
            onlySegments = null;
        else if (doFix) {
            System.out.println("ERROR: cannot specify both -fix and -segment");
            System.exit(1);
        }

        System.out.println("\nOpening index @ " + indexPath + "\n");
        Directory dir = null;
        try {
            dir = FSDirectory.open(new File(indexPath));
        } catch (Throwable t) {
            System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
            t.printStackTrace(System.out);
            System.exit(1);
        }

        CheckIndex checker = new CheckIndex(dir);
        checker.setInfoStream(System.out);

        Status result = checker.checkIndex(onlySegments);
        if (result.missingSegments) {
            System.exit(1);
        }

        if (!result.clean) {
            if (!doFix) {
                System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
            } else {
                System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
                System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
                for (int s = 0; s < 5; s++) {
                    Thread.sleep(1000);
                    System.out.println("  " + (5 - s) + "...");
                }
                System.out.println("Writing...");
                checker.fixIndex(result);
                System.out.println("OK");
                System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
            }
        }
        System.out.println("");

        final int exitCode;
        if (result.clean == true)
            exitCode = 0;
        else
            exitCode = 1;
        System.exit(exitCode);
    }
}
